
R version 4.2.2 (2022-10-31 ucrt) -- "Innocent and Trusting"
Copyright (C) 2022 The R Foundation for Statistical Computing
Platform: x86_64-w64-mingw32/x64 (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> library(stm)
stm v1.3.6 successfully loaded. See ?stm for help. 
 Papers, resources, and other materials at structuraltopicmodel.com
> library(quanteda)
Package version: 3.2.4
Unicode version: 13.0
ICU version: 69.1
Parallel computing: 8 of 8 threads used.
See https://quanteda.io for tutorials and examples.
> library(quanteda.textplots)
> library(quanteda.textstats)
> library(quanteda.corpora)
> library(stopwords)
> library(ggplot2)
> library(patchwork)
> #####################
> # load ad prep data #
> #####################
> ## set working directory - insert relevant path in next line (and drop # sign)
>   # setwd("[working directory path]")
> ## load data
> corpus.cov <- read.csv("BSG & Yakter PSRM - text corpus processed.csv")
> ## create corpus and token objects
> dat.corpus <- corpus(corpus.cov, docid_field = "uid", text_field = "trim", unique_docnames = FALSE)
> dat.tok <- tokens(dat.corpus)
> # use Quanteda's suggestion for (additional) Hebrew optimization
> dat.tok <- tokens(dat.tok, remove_punct = FALSE, remove_numbers = TRUE) %>% 
+   tokens_select(pattern = "^[\\p{script=Hebr}]+$", valuetype = "regex") %>% 
+   tokens_remove(pattern = stopwords("he", source = "marimo"), min_nchar = 2)
> print(dat.tok[2], max_ndoc = 1, max_ntoken = -1)
Tokens consisting of 1 document and 7 docvars.
105195013.1 :
[1] "טרור"  "גדל"   "עכר"   "ישראל"

> 
> ## create document-feature matrix (dfm) 
> dat.dfm <- dfm(dat.tok)
> dat.dfm <- dfm_keep(dat.dfm, min_nchar = 3)
> ##################################################
> # keyness - violence vs. non-violence (Figure 4) #
> ##################################################
> ## run analysis
> tstat_key.vl <- textstat_keyness(dat.dfm, docvars(dat.corpus, "tr_violence") == 1)
> tstat_key.vl.top <- head(tstat_key.vl, 20)
> tstat_key.vl.bottom <- tail(tstat_key.vl, 20)
> 
> ## save files for manual translation to English (translate only top + bottom 20 words with minimum count of 5)
> write.csv(tstat_key.vl.top,"BSG & Yakter PSRM - keyness heb violence.csv", row.names = FALSE)
> write.csv(tstat_key.vl.bottom,"BSG & Yakter PSRM - keyness heb nonviolence.csv", row.names = FALSE)
> ## after manual translation, load and plot English version of the results
> # load keyness scores with translation
> tstat_key.vl.eng <- read.csv("BSG & Yakter PSRM - keyness eng violence.csv")
> tstat_key.nvl.eng <- read.csv("BSG & Yakter PSRM - keyness eng nonviolence.csv")
> # omit terms without English translation (i.e., below top 20 or with <5 count)
> tstat_key.vl.eng <- tstat_key.vl.eng[!(!is.na(tstat_key.vl.eng$english) & tstat_key.vl.eng$english==""), ]
> tstat_key.nvl.eng <- tstat_key.nvl.eng[!(!is.na(tstat_key.nvl.eng$english) & tstat_key.nvl.eng$english==""), ]
> # order words by their chi2 values for the plot
> tstat_key.vl.eng$english <- factor(tstat_key.vl.eng$english, levels = tstat_key.vl.eng[order(tstat_key.vl.eng$chi2), 2])
> tstat_key.nvl.eng$english <- factor(tstat_key.nvl.eng$english, levels = tstat_key.nvl.eng[order(tstat_key.nvl.eng$chi2), 2])
> 
> ## plot
> # violence condition
> plot.key.vl <- ggplot(data=tstat_key.vl.eng, aes(y=english,x=chi2)) +
+   geom_col() +
+   labs(x="Keyness Score (Chi Square)",title = "Violence Condition") +
+   theme_bw() +
+   theme(legend.position = "none",
+         panel.grid = element_blank(),
+         plot.title = element_text(size=11),
+         axis.title.y = element_blank(),
+   )
> # non-violence condition
> plot.key.nvl <- ggplot(data=tstat_key.nvl.eng, aes(y=english,x=chi2)) +
+   geom_col() +
+   labs(x="Keyness Score (Chi Square)",title = "Non-violence Condition") +
+   theme_bw() +
+   theme(legend.position = "none",
+         panel.grid = element_blank(),
+         plot.title = element_text(size=11),
+         axis.title.y = element_blank(),
+   )
> plot.key.vl.both <- plot.key.vl + plot.key.nvl
> # save file
> ggsave(file = "Figure 4.pdf", plot=plot.key.vl.both, width=6.5, height=4)
> #################################################
> # keyness - left-wing vs. right-wing (Figure 5) #
> #################################################
> # sub-sample of nonviolent treatments only
> dat.cor.nv <- corpus_subset(dat.corpus, tr_violence == 0)
> dfm.nv <- dfm(tokens(dat.cor.nv))
> 
> # run analysis - right-wing respondents
> tstat_key.rgt <- textstat_keyness(dfm.nv, docvars(dat.cor.nv, "vote_bloc_rcl") == 3)
> # run analysis - left-wing respondents
> tstat_key.lft <- textstat_keyness(dfm.nv, docvars(dat.cor.nv, "vote_bloc_rcl") == 1)
> 
> ## save files for manual translation to English (translate only top 20 words with minimum count of 5)
> write.csv(tstat_key.rgt,"BSG & Yakter PSRM - keyness heb right.csv", row.names = FALSE)
> write.csv(tstat_key.lft,"BSG & Yakter PSRM - keyness heb left.csv", row.names = FALSE)
> 
> ## after manual translation, load and plot English version of the results
> # load keyness scores with translation
> tstat_key.rgt.eng <- read.csv("BSG & Yakter PSRM - keyness eng right.csv")
> tstat_key.lft.eng <- read.csv("BSG & Yakter PSRM - keyness eng left.csv")
> # omit terms without English translation (i.e., below top 20 or with <5 count)
> tstat_key.rgt.eng <- tstat_key.rgt.eng[!(!is.na(tstat_key.rgt.eng$english) & tstat_key.rgt.eng$english==""), ]
> tstat_key.lft.eng <- tstat_key.lft.eng[!(!is.na(tstat_key.lft.eng$english) & tstat_key.lft.eng$english==""), ]
> # order words by their chi2 values for the plot
> tstat_key.rgt.eng$english <- factor(tstat_key.rgt.eng$english, levels = tstat_key.rgt.eng[order(tstat_key.rgt.eng$chi2), 2])
> tstat_key.lft.eng$english <- factor(tstat_key.lft.eng$english, levels = tstat_key.lft.eng[order(tstat_key.lft.eng$chi2), 2])
> 
> ## plot
> # right-wingers
> plot.key.rgt <- ggplot(data=tstat_key.rgt.eng, aes(y=english,x=chi2)) +
+   geom_col() +
+   labs(x="Keyness Score (Chi Square)",title = "Right-wing Respondents") +
+   scale_x_continuous(limits = c(0,10), breaks=seq(0,10,by=2)) +
+     theme_bw() +
+   theme(legend.position = "none",
+         panel.grid = element_blank(),
+         plot.title = element_text(size=11),
+         axis.title.y = element_blank(),
+   )
> # left wingers
> plot.key.lft <- ggplot(data=tstat_key.lft.eng, aes(y=english,x=chi2)) +
+   geom_col() +
+   labs(x="Keyness Score (Chi Square)",title = "Left-wing Respondents") +
+   theme_bw() +
+   theme(legend.position = "none",
+         panel.grid = element_blank(),
+         plot.title = element_text(size=11),
+         axis.title.y = element_blank(),
+   )
> # combine
> plot.key.both <- plot.key.lft + plot.key.rgt
> 
> ggsave(file = "Figure 5.pdf", plot=plot.key.both, width=6.5, height=4)
> 
> ##############################################
> # robustness - manipulation check (Table A4) #
> # for action-type treatment (Hebrew words)   #
> ##############################################
> textstat_frequency(dat.dfm, n = 12, groups = tr_action)
   feature frequency rank docfreq group
1     טרור       318    1     288     1
2     אזרח        93    2      89     1
3      רצח        62    3      59     1
4      פשע        60    4      58     1
5    פעולה        57    5      44     1
6    יהודי        47    6      38     1
7      פגע        44    7      44     1
8      הרג        44    7      43     1
9      דבר        37    9      36     1
10    הרוג        37    9      35     1
11   מדינה        30   11      27     1
12   פגיעה        28   12      27     1
13    טרור       272    1     220     2
14   ישראל        97    2      88     2
15   מדינה        65    3      59     2
16   פעולה        52    4      43     2
17    מהלך        47    5      46     2
18    כלכל        43    6      41     2
19     פגע        41    7      41     2
20   פגיעה        40    8      39     2
21     דבר        37    9      34     2
22     חרם        35   10      34     2
23  אלימות        25   11      23     2
24     דעה        24   12      21     2
25    טרור       213    1     181     3
26   ישראל        98    2      84     3
27   מדינה        74    3      60     3
28   פעולה        42    4      42     3
29    מהלך        40    5      39     3
30     אין        34    6      33     3
31     פגע        33    7      31     3
32     רצה        32    8      26     3
33     דעה        28    9      28     3
34     חשב        25   10      24     3
35    עולם        23   11      21     3
36   יהודי        22   12      20     3
37    טרור       203    1     173     4
38   ישראל        57    2      53     4
39   מדינה        53    3      48     4
40   פעולה        47    4      44     4
41     שטח        42    5      34     4
42   בנייה        40    6      35     4
43    חוקה        39    7      38     4
44     חוק        36    8      33     4
45   יהודי        35    9      26     4
46    בניה        25   10      24     4
47     חשב        25   10      24     4
48    מהלך        24   12      23     4
> # outcome in Hebrew, translated manually